3. Les parlementaires sur le réseau social Twitter#

3.1. Analyse des données textuelles#

import pandas as pd
from lib.figures import *
from lib.constant import *
from lib.utils import *

from bokeh.io import output_notebook
output_notebook(hide_banner=True)

twitter_df = pd.read_parquet('data/twitter_fev_to_juin_2023_retraite_data.parquet')
intervention_frequency_per_group(twitter_df)
Hide code cell source
from bokeh.models import TabPanel, Tabs

df = getCountDataframe(twitter_df,top_n=10)

tab1 = TabPanel(child=occurrenceDistributionPerGroupePolitique(df[df.num_words == 1]), title="1 mot")
tab2 = TabPanel(child=occurrenceDistributionPerGroupePolitique(df[df.num_words == 2]), title="2 mots")
tab3 = TabPanel(child=occurrenceDistributionPerGroupePolitique(df[df.num_words == 3]), title="3 mots")

show(Tabs(tabs=[tab1, tab2,tab3],sizing_mode ="stretch_width"))
from bokeh.models import TabPanel, Tabs

df = getCountDataframe(twitter_df,top_n=10)

tab1 = TabPanel(child=occurrenceDistributionPerPolitiqueOrientation(df[df.num_words == 1]), title="1 mot")
tab2 = TabPanel(child=occurrenceDistributionPerPolitiqueOrientation(df[df.num_words == 2]), title="2 mots")
tab3 = TabPanel(child=occurrenceDistributionPerPolitiqueOrientation(df[df.num_words == 3]), title="3 mots")

show(Tabs(tabs=[tab1, tab2,tab3],sizing_mode ="stretch_width"))

3.2. Network Data#

twitter_df = pd.read_parquet('data/twitter_fev_to_juin_2023_retraite_data.parquet')
twitter_df = twitter_df[~(twitter_df.retweet_id == None)]
twitter_df= twitter_df[twitter_df.is_keywords]
deputy_df = pd.read_csv("data/2022_now_deputy.csv",sep=";")
slug2twitterat = dict(deputy_df["slug twitter".split()].values)
twitter_df["twitter_at"] = twitter_df.username.map(slug2twitterat)
twitter_df.head(2)
username full_text date in_reply_to_screen_name in_reply_to_status_id_str in_reply_to_user_id_str retweet_id retweet_username retweet_user_id is_quote_status quoted_status_id_str groupe_sigle hashtag is_hashtag lemmatization keywords_detected is_keywords twitter_at
464 jean-luc-fugit RT : La réforme des retraites soulève la quest... 2023-02-01 00:00:59+00:00 None None None 1620569649967681542 StanGuerini 1911591212 False None REN [#retraites] True rt : le réforme de retraite soulever le questi... [retraite, retrait, réforme, réforme de retrai... True Jean_LucFUGIT
453 laure-lavalette RT : . (RN) interpelle (LFI) : "On ne comprend... 2023-02-01 00:19:24+00:00 None None None 1620535420223213569 LCP 85362553 False None RN [] False rt : . ( RN ) interpelle ( LFI ) : " on ne com... [obstruction, majorité] True LaureLavalette
deputy_df["color"] = deputy_df.groupe_sigle.map(gp_politique_color)
dep2color = dict(deputy_df["twitter color".split()].values)
dep2sigle = dict(deputy_df["twitter groupe_sigle".split()].values)
def color(node):
    if not node in dep2color:
        return "#aaa"
    return dep2color[node]

def gp_legend(node):
    if not node in dep2sigle:
        return "NA"
    return dep2sigle[node]
import networkx as nx
graph_df = twitter_df["twitter_at retweet_username groupe_sigle".split()].astype(str)
graph_df = graph_df[~(graph_df.isna())]
#graph_df = graph_df[graph_df.retweet_username.isin(deputy_df.twitter.values)]
graph_df = graph_df.groupby("twitter_at retweet_username".split(),as_index=False).size()
G = nx.from_pandas_edgelist(graph_df,source="twitter_at",target="retweet_username",edge_attr="size",create_using=nx.DiGraph)
for node in list(G.nodes()):
    if G.degree(node)<4:
        G.remove_node(node)
if "None" in G: G.remove_node("None")
from ipysigma import Sigma,SigmaGrid
betweeness = nx.betweenness_centrality(G)
page_rank = nx.pagerank(G)
SigmaGrid(G,hide_search=False,columns=2).add(node_size=G.in_degree,name="In Degree",
      node_color=dep2sigle,
      default_node_border_color="#ffffff",
      node_color_palette=gp_politique_color,
      node_label_size=G.degree,
      node_size_range=[3,20],
      start_layout=10, default_edge_type="curve",
      label_font="Arial",
      edge_size_range=[0.1,1])\
    .add(node_size=lambda x:betweeness[x],
      node_color=dep2sigle,
      default_node_border_color="#ffffff",
      node_color_palette=gp_politique_color,
      node_label_size=lambda x:betweeness[x],
      start_layout=10, default_edge_type="curve",
      label_font="Arial",
      edge_size_range=[1,5],node_size_range=[3,20],name="Betweeness")\
      .add(node_size=lambda x:page_rank[x],
      node_color=dep2sigle,
      default_node_border_color="#ffffff",
      node_color_palette=gp_politique_color,
      node_label_size=lambda x:page_rank[x],
      start_layout=10, default_edge_type="curve",
      label_font="Arial",
      edge_size_range=[1,5],node_size_range=[3,20],name="Page Rank")